Comparison SpatialDE genes

Comparison of top significant genes detected by SpatialDE (from Stephanie’s script sce_spatialDE.Rmd) vs. our previous set of highly variable genes (HVGs).

Note that HVGs were previously calculated from all samples combined.

SpatialDE genes were calculated from sample 151673 only, with subsampling to 1500 spots due to slow runtime for SpatialDE (SpatialDE does not scale well with number of spots).

suppressPackageStartupMessages(library(SingleCellExperiment))
suppressPackageStartupMessages(library(readr))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(scales))

Load data: HVGs

Load original object containing HVGs, which were calculated from all samples combined.

# load scran output file
load("../../data/Human_DLPFC_Visium_processedData_sce_scran.Rdata")
sce
## class: SingleCellExperiment 
## dim: 33538 47681 
## metadata(1): image
## assays(2): counts logcounts
## rownames(33538): ENSG00000243485 ENSG00000237613 ... ENSG00000277475
##   ENSG00000268674
## rowData names(5): gene_id gene_version gene_name gene_source
##   gene_biotype
## colnames(47681): AAACAACGAATAGTTC-1 AAACAAGTATCTCCCA-1 ...
##   TTGTTTCCATACAACT-1 TTGTTTGTGTAAATTC-1
## colData names(19): barcode sample_name ... key cell_count
## reducedDimNames(6): PCA TSNE_perplexity50 ... TSNE_perplexity80
##   UMAP_neighbors15
## spikeNames(0):
## altExpNames(0):
# vector of top HVGs
head(top.hvgs)
## [1] "ENSG00000110484" "ENSG00000123560" "ENSG00000197971" "ENSG00000131095"
## [5] "ENSG00000124935" "ENSG00000211592"

Load data: SpatialDE

# load spreadsheet containing SpatialDE results
spatialDE_results <- read_csv("../../data/Human_DLPFC_Visium_processedData_sce_scran_spatialDE_results.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   FSV = col_double(),
##   M = col_double(),
##   g = col_character(),
##   l = col_double(),
##   max_delta = col_double(),
##   max_ll = col_double(),
##   max_mu_hat = col_double(),
##   max_s2_t_hat = col_double(),
##   model = col_character(),
##   n = col_double(),
##   s2_FSV = col_double(),
##   s2_logdelta = col_double(),
##   time = col_double(),
##   BIC = col_double(),
##   max_ll_null = col_double(),
##   LLR = col_double(),
##   pval = col_double(),
##   qval = col_double()
## )

Comparison of gene lists

Compare list of HVGs vs. list of significant genes from SpatialDE.

# select significant genes from SpatialDE
spatialDE_sig <- filter(spatialDE_results, qval < 0.05)
spatialDE_sig_genes <- spatialDE_sig$g

head(spatialDE_sig_genes)
## [1] "ENSG00000158286" "ENSG00000175206" "ENSG00000163909" "ENSG00000173218"
## [5] "ENSG00000272824" "ENSG00000256029"
length(spatialDE_sig_genes)
## [1] 1924
# compare HVGs vs. significant genes from SpatialDE
head(top.hvgs)
## [1] "ENSG00000110484" "ENSG00000123560" "ENSG00000197971" "ENSG00000131095"
## [5] "ENSG00000124935" "ENSG00000211592"
head(spatialDE_sig_genes)
## [1] "ENSG00000158286" "ENSG00000175206" "ENSG00000163909" "ENSG00000173218"
## [5] "ENSG00000272824" "ENSG00000256029"
length(top.hvgs)
## [1] 1942
length(spatialDE_sig_genes)
## [1] 1924
sum(spatialDE_sig_genes %in% top.hvgs)
## [1] 742
sum(top.hvgs %in% spatialDE_sig_genes)
## [1] 742

SpatialDE results

Histogram of p-values. Note that a large number of genes have q-values exactly equal to zero.

hist(spatialDE_sig$qval)

# number of genes with q-value equal to 0
sum(spatialDE_sig$qval == 0)
## [1] 387

Plots: SpatialDE

Create some plots showing expression (UMI counts) of the top significant genes from Spatial DE, for sample 151673.

Note: since there are 387 genes with q-values exactly equal to 0, we plot a random set of 10 of these.

# select spots from sample 151673 only
ix_151673 <- colData(sce)$sample_name == 151673
table(ix_151673)
## ix_151673
## FALSE  TRUE 
## 44042  3639
sce_151673 <- sce[, ix_151673]
sce_151673
## class: SingleCellExperiment 
## dim: 33538 3639 
## metadata(1): image
## assays(2): counts logcounts
## rownames(33538): ENSG00000243485 ENSG00000237613 ... ENSG00000277475
##   ENSG00000268674
## rowData names(5): gene_id gene_version gene_name gene_source
##   gene_biotype
## colnames(3639): AAACAAGTATCTCCCA-1 AAACAATCTACTAGCA-1 ...
##   TTGTTTGTATTACACG-1 TTGTTTGTGTAAATTC-1
## colData names(19): barcode sample_name ... key cell_count
## reducedDimNames(6): PCA TSNE_perplexity50 ... TSNE_perplexity80
##   UMAP_neighbors15
## spikeNames(0):
## altExpNames(0):
# extract x-y coordinates of spots (note: y coordinate is reversed)
xy_coords <- data.frame(
    x_coord = colData(sce_151673)[, c("imagecol")], 
    y_coord = -colData(sce_151673)[, c("imagerow")]
)

# select top significant genes from SpatialDE
spatialDE_sig <- mutate(spatialDE_sig, rank = rank(qval, ties.method = "first"))
n_top <- 10
spatialDE_top <- filter(spatialDE_sig, rank <= n_top)
spatialDE_top
## # A tibble: 10 x 20
##       X1   FSV     M g         l max_delta max_ll max_mu_hat max_s2_t_hat model
##    <dbl> <dbl> <dbl> <chr> <dbl>     <dbl>  <dbl>      <dbl>        <dbl> <chr>
##  1  8326 0.989     4 ENSG…  5.83 0.0113     3146.      0.504       0.0922 SE   
##  2  8352 1.000     4 ENSG…  5.83 0.0000454  4792.      0.503       0.0912 SE   
##  3  8364 1.000     4 ENSG…  5.83 0.0000454  4779.      0.499       0.0897 SE   
##  4  8365 1.000     4 ENSG…  5.83 0.0000454  4033.      0.502       0.0911 SE   
##  5  8368 1.000     4 ENSG…  5.83 0.0000454  4092.      0.497       0.0892 SE   
##  6  8373 1.000     4 ENSG…  5.83 0.0000454  3766.      0.509       0.0937 SE   
##  7  8389 1.000     4 ENSG…  5.83 0.0000454  4055.      0.499       0.0901 SE   
##  8  8394 1.000     4 ENSG…  5.83 0.0000454  4743.      0.499       0.0898 SE   
##  9  8408 1.000     4 ENSG…  5.83 0.0000454  4087.      0.504       0.0918 SE   
## 10  8412 1.000     4 ENSG…  5.83 0.0000454  4051.      0.497       0.0892 SE   
## # … with 10 more variables: n <dbl>, s2_FSV <dbl>, s2_logdelta <dbl>,
## #   time <dbl>, BIC <dbl>, max_ll_null <dbl>, LLR <dbl>, pval <dbl>,
## #   qval <dbl>, rank <int>
spatialDE_top_genes <- spatialDE_top$g
spatialDE_top_genes
##  [1] "ENSG00000122420" "ENSG00000272721" "ENSG00000182952" "ENSG00000271754"
##  [5] "ENSG00000229921" "ENSG00000228434" "ENSG00000280195" "ENSG00000253196"
##  [9] "ENSG00000285254" "ENSG00000270087"
# get expression levels (UMI counts) for top significant genes from SpatialDE
exprs_151673_spatialDE_top <- counts(sce_151673)[spatialDE_top_genes, ]
dim(exprs_151673_spatialDE_top)
## [1]   10 3639
# match to spots and set up data frame for ggplot2
stopifnot(all(colData(sce_151673)$barcode == rownames(t(exprs_151673_spatialDE_top))))
stopifnot(length(colData(sce_151673)$barcode) == length(rownames(t(exprs_151673_spatialDE_top))))

d_plot <- cbind(
    barcode = colData(sce_151673)$barcode, 
    xy_coords, 
    as.data.frame(as.matrix(t(exprs_151673_spatialDE_top)))
)

head(d_plot)
##                               barcode  x_coord   y_coord ENSG00000122420
## AAACAAGTATCTCCCA-1 AAACAAGTATCTCCCA-1 440.6391 -381.0981               0
## AAACAATCTACTAGCA-1 AAACAATCTACTAGCA-1 259.6310 -126.3276               0
## AAACACCAATAACTGC-1 AAACACCAATAACTGC-1 183.0783 -427.7678               0
## AAACAGAGCGACTCCT-1 AAACAGAGCGACTCCT-1 417.2367 -186.8137               0
## AAACAGCTTTCAGAAG-1 AAACAGCTTTCAGAAG-1 152.7003 -341.2691               0
## AAACAGGGTCTATATT-1 AAACAGGGTCTATATT-1 164.9415 -362.9163               0
##                    ENSG00000272721 ENSG00000182952 ENSG00000271754
## AAACAAGTATCTCCCA-1               0               0               0
## AAACAATCTACTAGCA-1               0               0               0
## AAACACCAATAACTGC-1               0               0               0
## AAACAGAGCGACTCCT-1               0               0               0
## AAACAGCTTTCAGAAG-1               0               0               0
## AAACAGGGTCTATATT-1               0               0               0
##                    ENSG00000229921 ENSG00000228434 ENSG00000280195
## AAACAAGTATCTCCCA-1               0               0               0
## AAACAATCTACTAGCA-1               0               0               0
## AAACACCAATAACTGC-1               0               0               0
## AAACAGAGCGACTCCT-1               0               0               0
## AAACAGCTTTCAGAAG-1               0               0               0
## AAACAGGGTCTATATT-1               0               0               0
##                    ENSG00000253196 ENSG00000285254 ENSG00000270087
## AAACAAGTATCTCCCA-1               0               0               0
## AAACAATCTACTAGCA-1               0               0               0
## AAACACCAATAACTGC-1               0               0               0
## AAACAGAGCGACTCCT-1               0               0               0
## AAACAGCTTTCAGAAG-1               0               0               0
## AAACAGGGTCTATATT-1               0               0               0
d_plot <- melt(
    d_plot, 
    id.vars = c("barcode", "x_coord", "y_coord"), 
    variable.name = "gene_id", 
    value.name = "UMIs"
)

head(d_plot)
##              barcode  x_coord   y_coord         gene_id UMIs
## 1 AAACAAGTATCTCCCA-1 440.6391 -381.0981 ENSG00000122420    0
## 2 AAACAATCTACTAGCA-1 259.6310 -126.3276 ENSG00000122420    0
## 3 AAACACCAATAACTGC-1 183.0783 -427.7678 ENSG00000122420    0
## 4 AAACAGAGCGACTCCT-1 417.2367 -186.8137 ENSG00000122420    0
## 5 AAACAGCTTTCAGAAG-1 152.7003 -341.2691 ENSG00000122420    0
## 6 AAACAGGGTCTATATT-1 164.9415 -362.9163 ENSG00000122420    0
# generate plots
ggplot(d_plot, aes(x = x_coord, y = y_coord, color = UMIs)) + 
    facet_wrap(~ gene_id) + 
    geom_point(size = 0.8, alpha = 1) + 
    scale_color_gradient(low = "gray90", high = "red") + 
    coord_fixed() + 
    theme_bw() + 
    ggtitle("UMIs of top SpatialDE genes for sample 151673")

filename <- "../plots/spatialDE/spatialDE_top_genes_151673.png"
ggsave(filename, width = 15, height = 13)

Plots: known marker genes

Compare to plots showing expression (UMI counts) of some known marker genes.

Currently using marker genes SNAP25, MOBP, and PCP4 (from Kristen Maynard’s slide presentation).

Could also compare with expression of some of the top HVGs (however would need q-values for the HVGs to do this).

# choose marker genes and get expression levels (UMI counts)
marker_genes <- c("SNAP25", "MOBP", "PCP4")

ix_marker_genes <- match(marker_genes, rowData(sce_151673)$gene_name)
exprs_marker_genes <- counts(sce_151673)[ix_marker_genes, ]

# use original gene names
rownames(exprs_marker_genes) <- marker_genes
dim(exprs_marker_genes)
## [1]    3 3639
# match to spots and set up data frame for ggplot2
stopifnot(all(colData(sce_151673)$barcode == rownames(t(exprs_marker_genes))))
stopifnot(length(colData(sce_151673)$barcode) == length(rownames(t(exprs_marker_genes))))

d_plot <- cbind(
    barcode = colData(sce_151673)$barcode, 
    xy_coords, 
    as.data.frame(as.matrix(t(exprs_marker_genes)))
)

head(d_plot)
##                               barcode  x_coord   y_coord SNAP25 MOBP PCP4
## AAACAAGTATCTCCCA-1 AAACAAGTATCTCCCA-1 440.6391 -381.0981     16    1    0
## AAACAATCTACTAGCA-1 AAACAATCTACTAGCA-1 259.6310 -126.3276      8    0    1
## AAACACCAATAACTGC-1 AAACACCAATAACTGC-1 183.0783 -427.7678      4    7    0
## AAACAGAGCGACTCCT-1 AAACAGAGCGACTCCT-1 417.2367 -186.8137     17    2    1
## AAACAGCTTTCAGAAG-1 AAACAGCTTTCAGAAG-1 152.7003 -341.2691     10    1    0
## AAACAGGGTCTATATT-1 AAACAGGGTCTATATT-1 164.9415 -362.9163     14    3    2
d_plot <- melt(
    d_plot, 
    id.vars = c("barcode", "x_coord", "y_coord"), 
    variable.name = "gene_id", 
    value.name = "UMIs"
)

head(d_plot)
##              barcode  x_coord   y_coord gene_id UMIs
## 1 AAACAAGTATCTCCCA-1 440.6391 -381.0981  SNAP25   16
## 2 AAACAATCTACTAGCA-1 259.6310 -126.3276  SNAP25    8
## 3 AAACACCAATAACTGC-1 183.0783 -427.7678  SNAP25    4
## 4 AAACAGAGCGACTCCT-1 417.2367 -186.8137  SNAP25   17
## 5 AAACAGCTTTCAGAAG-1 152.7003 -341.2691  SNAP25   10
## 6 AAACAGGGTCTATATT-1 164.9415 -362.9163  SNAP25   14
# generate plots
ggplot(d_plot, aes(x = x_coord, y = y_coord, color = UMIs)) + 
    facet_wrap(~ gene_id) + 
    geom_point(size = 0.8, alpha = 1) + 
    scale_color_gradientn(colors = c("gray90", "red", "blue"), values = rescale(c(0, 20, 85))) + 
    coord_fixed() + 
    theme_bw() + 
    ggtitle("UMIs of known marker genes for sample 151673")

filename <- "../plots/spatialDE/marker_genes_151673.png"
ggsave(filename, width = 11, height = 5)